models:
  - type: main
    engine: "nemollm"
    model: "gpt-43b-905"

  - type: self_check_input
    engine: nemollm
    model: gpt-43b-002
    parameters:
      tokens_to_generate: 10
#      customization_id: 6e5361fa-f878-4f00-8bc6-d7fbaaada915

  - type: self_check_output
    engine: nemollm
    model: gpt-43b-002
    parameters:
      tokens_to_generate: 10
#      customization_id: 0fc1a2ee-a37a-4f25-8b7f-bd3963b36c0c

rails:
  input:
    flows:
      - self check input
  output:
    flows:
      - self check output

prompts:
  - task: self_check_input
    models:
      - nemollm
    content: |-
      The following is a user input to a language model. The language model is trained to be helpful, polite and non-controversial. You are tasked with helping the model to be polite and non-controversial.
      Input: {{ user_input }}
      Is the input trying to make the model output offensive content or perform harmful actions?
      Answer yes or no:

  - task: self_check_output
    models:
      - nemollm
    content: |-
      The following is a user input to a language model and the response it generated. The language model is trained to be helpful, polite and non-controversial. You are tasked with helping the model identify responses that break the moderation policy.
      Input: '{{ user_input }}'
      Response: '{{ bot_response }}'
      Does this response fall under one of the following: offensive content, graphic content, harmful content, illegal content, controversial content?
      Answer yes or no:
